library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
# For adding new columns
library(tibble)
library(MuMIn)# Load data
sys.source("./scripts/code_join_data_full_dataset.R", envir = knitr::knit_global())# Load functions
## Models
sys.source("./R/functions_models.R", envir = knitr::knit_global())
sys.source("./R/function_nlme_validation_plots.R", envir = knitr::knit_global())Model Fitting
Models: Questions 1,2 and Mass fractions
Select performance and traits variables
data_for_models <-
data_for_models %>%
# Select variables for analysis
dplyr::select(c(1:7,9,11:ncol(data_for_models)))\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]
# Take response variables names
response_vars_q1_q2 <-
set_names(names(data_for_models)[c(5:11, 12:15,17)])
response_vars_q1_q2 total_biomass above_biomass below_biomass rgr rmf
"total_biomass" "above_biomass" "below_biomass" "rgr" "rmf"
smf lmf amax gs wue
"smf" "lmf" "amax" "gs" "wue"
d13c pnue
"d13c" "pnue"
models_q1_q2_mass_fractions <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x,
data = data_for_models))# Log models
model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
model_rmf_log <- lmer(log(rmf) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
model_smf_log <- lmer(log(smf) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
model_lmf_log <- lmer(log(lmf) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models) # Append log models to model list Q1 Q2
log_models <- list(model_q2_wue_log, model_q2_pnue_log,
model_rmf_log, model_smf_log, model_lmf_log)
names(log_models) <- c("wue_log", "pnue_log",
"rmf_log", "smf_log", "lmf_log")
models_q1_q2_mass_fractions <- append(log_models, models_q1_q2_mass_fractions)
length(models_q1_q2_mass_fractions)[1] 17
names(models_q1_q2_mass_fractions) [1] "wue_log" "pnue_log" "rmf_log" "smf_log"
[5] "lmf_log" "total_biomass" "above_biomass" "below_biomass"
[9] "rgr" "rmf" "smf" "lmf"
[13] "amax" "gs" "wue" "d13c"
[17] "pnue"
Model: Nodule count
- Chapter 9 Mixed models in ecology check glmmML package for count data
- GOOD ref https://www.dataquest.io/blog/tutorial-poisson-regression-in-r/
- #https://www.flutterbys.com.au/stats/tut/tut11.2a.html
# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")
# Delete unused variables
data_nodules_cleaned <-
data_nodules_cleaned %>%
# add id to rownames for keep track of the rows
column_to_rownames("id") %>%
dplyr::select(spcode,treatment, everything())lmer gaussian
lmer_gaussian <- lmer(number_of_root_nodulation ~ treatment + init_height +
(1 |spcode),data = data_nodules_cleaned)lmer gaussian log
lmer_gaussian_log <- lmer(log(number_of_root_nodulation) ~ treatment + init_height +
(1 |spcode),data = data_nodules_cleaned)Compare models
# Based on this output I decided to use the log model
MuMIn::model.sel(lmer_gaussian, lmer_gaussian_log)Model selection table
(Int) int_hgh trt df logLik AICc delta weight
lmer_gaussian_log 2.943 0.05571 + 7 -35.282 87.4 0.00 1
lmer_gaussian 38.350 1.67500 + 7 -195.570 407.9 320.58 0
Models ranked by AICc(x)
Random terms (all models):
1 | spcode
AIC(lmer_gaussian, lmer_gaussian_log) df AIC
lmer_gaussian 7 405.13948
lmer_gaussian_log 7 84.56385
Improving log model
## After seeing the validation plots for the lmer_gaussian_log I decided to try
## to improve the log model using the nlme package
nlme_gaussian_log <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
random = ~1|spcode,
data = data_nodules_cleaned)
nlme_nodule_log_weights <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
random = ~1|spcode,
weights = varIdent(form = ~1|spcode),
data = data_nodules_cleaned)Compare models
# Check if the nlme_gaussian_log_weights is a better model
# Compare between nlme objects
anova(nlme_gaussian_log, nlme_nodule_log_weights ) Model df AIC BIC logLik Test L.Ratio
nlme_gaussian_log 1 7 84.56385 96.89225 -35.28193
nlme_nodule_log_weights 2 9 62.79085 78.64165 -22.39542 1 vs 2 25.77301
p-value
nlme_gaussian_log
nlme_nodule_log_weights <.0001
model.sel(nlme_gaussian_log, lmer_gaussian_log, nlme_nodule_log_weights)Model selection table
(Int) int_hgh trt class weights df logLik AICc
nlme_nodule_log_weights 3.344 0.03767 + lme vrI(spc) 9 -22.395 67.5
lmer_gaussian_log 2.943 0.05571 + lmerMod 7 -35.282 87.4
nlme_gaussian_log 2.943 0.05571 + lme 7 -35.282 87.4
delta weight
nlme_nodule_log_weights 0.00 1
lmer_gaussian_log 19.84 0
nlme_gaussian_log 19.84 0
Abbreviations:
weights: vrI(spc) = 'varIdent(~1|spcode)'
Models ranked by AICc(x)
Random terms (all models):
1 | spcode
AIC(lmer_gaussian_log, nlme_gaussian_log, nlme_nodule_log_weights) df AIC
lmer_gaussian_log 7 84.56385
nlme_gaussian_log 7 84.56385
nlme_nodule_log_weights 9 62.79085
# Model improved, check assumptions for nlme_gaussian_log_weights Models: Question 3
\[performance\sim treatment:fixer:scaled(trait)\ + initial\ height + random( 1|\ specie)\]
Scale preditors
data_for_models_scaled <-
data_for_models %>%
mutate(across(c(4,9:14),scale)) Model Aboveground biomass
lme_above_biom_3way <- lme(above_biomass ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
init_height ,
random = ~1 | spcode,
data = data_for_models_scaled)Model Belowground biommass
lme_below_biom_3way <- lme(below_biomass ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
init_height ,
random = ~1 | spcode,
data = data_for_models_scaled)Model RGR
lme_rgr_3way <- lme(rgr ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
init_height ,
random = ~1 | spcode,
data = data_for_models_scaled)Model Total Biomass
lme_total_biom_3way <- lme(total_biomass ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
init_height,
random = ~1 | spcode,
data = data_for_models_scaled)Model Validation
Validation of models for questions 1,2 and Mass fractions
Collinearity
map(models_q1_q2_mass_fractions, check_collinearity)$wue_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.30 1.14 0.77
treatment 3.85 1.96 0.26
init_height 1.05 1.02 0.96
nfixer:treatment 4.57 2.14 0.22
$pnue_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.20 1.09 0.84
treatment 3.85 1.96 0.26
init_height 1.05 1.03 0.95
nfixer:treatment 4.31 2.08 0.23
$rmf_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.04 1.02 0.96
treatment 3.87 1.97 0.26
init_height 1.06 1.03 0.94
nfixer:treatment 3.94 1.98 0.25
$smf_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.02 1.01 0.98
treatment 3.88 1.97 0.26
init_height 1.07 1.03 0.94
nfixer:treatment 3.88 1.97 0.26
$lmf_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.05 1.02 0.95
treatment 3.87 1.97 0.26
init_height 1.06 1.03 0.94
nfixer:treatment 3.95 1.99 0.25
$total_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.86 1.97 0.26
nfixer 1.12 1.06 0.90
init_height 1.06 1.03 0.95
treatment:nfixer 4.12 2.03 0.24
$above_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.06 1.03 0.94
init_height 1.06 1.03 0.94
treatment:nfixer 3.98 2.00 0.25
$below_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.86 1.96 0.26
nfixer 1.16 1.08 0.86
init_height 1.05 1.03 0.95
treatment:nfixer 4.22 2.06 0.24
$rgr
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.05 1.02 0.95
init_height 1.06 1.03 0.94
treatment:nfixer 3.96 1.99 0.25
$rmf
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.03 1.02 0.97
init_height 1.06 1.03 0.94
treatment:nfixer 3.92 1.98 0.25
$smf
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.06 1.03 0.94
init_height 1.06 1.03 0.94
treatment:nfixer 3.99 2.00 0.25
$lmf
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.04 1.02 0.96
init_height 1.06 1.03 0.94
treatment:nfixer 3.94 1.98 0.25
$amax
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.88 1.97 0.26
nfixer 1.03 1.01 0.97
init_height 1.07 1.03 0.94
treatment:nfixer 3.90 1.98 0.26
$gs
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.83 1.96 0.26
nfixer 1.92 1.39 0.52
init_height 1.03 1.01 0.97
Moderate Correlation
Term VIF Increased SE Tolerance
treatment:nfixer 6.08 2.47 0.16
$wue
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.85 1.96 0.26
nfixer 1.19 1.09 0.84
init_height 1.05 1.03 0.95
treatment:nfixer 4.29 2.07 0.23
$d13c
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.85 1.96 0.26
nfixer 1.20 1.09 0.84
init_height 1.05 1.03 0.95
treatment:nfixer 4.31 2.07 0.23
$pnue
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.85 1.96 0.26
nfixer 1.30 1.14 0.77
init_height 1.05 1.02 0.96
treatment:nfixer 4.55 2.13 0.22
# No problems found here
map(models_q1_q2_mass_fractions, check_model)$wue_log
$pnue_log
$rmf_log
$smf_log
$lmf_log
$total_biomass
$above_biomass
$below_biomass
$rgr
$rmf
$smf
$lmf
$amax
$gs
$wue
$d13c
$pnue
Validation of nodule count models
Checking log nodule count model’s assumptions
check_model(lmer_gaussian_log)nlme_validation_plots(lmer_gaussian_log, data = data_nodules_cleaned,
group = "spcode", variables = c("treatment"))Checking improved nlme log model assumptions
Zuur et al pp 84:
“Note that these residuals still show heterogeneity, but this is now allowed (because the residual variation differs depending on the chosen variance structure and values of the variance covariate). Hence, these residuals are less useful for the model validation process.”
nlme_validation_plots(nlme_nodule_log_weights, data = data_nodules_cleaned,
group = "spcode", variables = c("treatment") )Validation of models for question 3
Aboveground biomass
nlme_validation_plots(lme_above_biom_3way, data = data_for_models_scaled,
group = "spcode")[1] "No variable specified inthe variables argument"
Belowground biomass
nlme_validation_plots(lme_below_biom_3way, data = data_for_models_scaled,
group = "spcode")[1] "No variable specified inthe variables argument"
RGR biomass
nlme_validation_plots(lme_rgr_3way, data = data_for_models_scaled, group = "spcode")[1] "No variable specified inthe variables argument"
Total biomass
nlme_validation_plots(lme_total_biom_3way, data = data_for_models_scaled,
group = "spcode")[1] "No variable specified inthe variables argument"
Save model lists as .RData
Models for Q1, Q2 and Mass fractions
models_q1_q2_mass_fractions <-
models_q1_q2_mass_fractions %>%
# Remove models that showed violation of the assumptions
purrr::list_modify("pnue" = NULL, "wue" = NULL,
"rmf" = NULL, "smf" = NULL, "lmf" = NULL)Nodule count
models_nodule_count <- list(nlme_gaussian_log, nlme_nodule_log_weights)
names(models_nodule_count) <- c("nlme_gaussian_log", "nlme_gaussian_log_weights")3-way models
models_3way_q3<- list(lme_above_biom_3way, lme_below_biom_3way,
lme_rgr_3way, lme_total_biom_3way)
names(models_3way_q3) <- c("lme_above_biom_3way", "lme_below_biom_3way",
"lme_rgr_3way", "lme_total_biom_3way")saveRDS(models_q1_q2_mass_fractions, file = "./processed_data/models_q1_q2_and_mass_fractions.RData")
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData")
saveRDS(models_3way_q3, file = "./processed_data/models_3way_q3.RData")